R function called read.echo() to parse the input echo files and extract: * time * strain * strain rate * radial strain * radial strain rate * volume * segment volume
read.echo <- function(infile){
# Read in the file into a single vector
con <- readLines(infile)
# Instantiate vectors
frame.time <- vector()
time.progression <- vector()
strain <- vector()
strain.rate <- vector()
radial.strain <- vector()
radial.strain.rate <- vector()
volume <- vector()
seg.vol <- vector()
for(i in 1:length(con)){
if(con[i]=="FrameTime(ms)"){
frame.time <- strsplit(con[i+1], split="\t")[[1]]
}
if(con[i]=="TimeProgression(ms)"){
time.progression <- strsplit(con[i+1], split="\t")[[1]]
}
if(con[i]=="Strain (%)"){
j <- 1
while(con[i+j] != ""){
strain <- rbind(strain, strsplit(con[i+j], split="\t")[[1]])
j <- j + 1
}
}
if(con[i]=="StrainRate (1/s)"){
j <- 1
while(con[i+j] != ""){
strain.rate <- rbind(strain.rate, strsplit(con[i+j], split="\t")[[1]])
j <- j + 1
}
}
if(con[i]=="RadialStrain (%)"){
j <- 1
while(con[i+j] != ""){
radial.strain <- rbind(radial.strain, strsplit(con[i+j], split="\t")[[1]])
j <- j + 1
}
}
if(con[i]=="RadialStrainRate (1/s)"){
j <- 1
while(con[i+j] != ""){
radial.strain.rate <- rbind(radial.strain.rate, strsplit(con[i+j], split="\t")[[1]])
j <- j + 1
}
}
if(con[i]=="Vol (ml)"){
volume <- strsplit(con[i+1], split="\t")[[1]]
}
if(con[i]=="Seg_Vol (ml)"){
j <- 1
while(con[i+j] != ""){
seg.vol <- rbind(seg.vol, strsplit(con[i+j], split="\t")[[1]])
j <- j + 1
}
}
}
class(frame.time) <- "numeric"
class(time.progression) <- "numeric"
class(strain) <- "numeric"
class(strain.rate) <- "numeric"
class(radial.strain) <- "numeric"
class(radial.strain.rate) <- "numeric"
class(volume) <- "numeric"
class(seg.vol) <- "numeric"
return(list(frame.time=(frame.time),
time.progression=(time.progression),
strain=t(strain),
strain.rate=t(strain.rate),
radial.strain=t(radial.strain),
radial.strain.rate=t(radial.strain.rate),
volume=(volume),
seg.vol=(seg.vol)))
}
# Read in the echos
echo1 <- read.echo("~/Projects/partho/data/pborsuk2c.txt")
echo2 <- read.echo("~/Projects/partho/data/pborsuk4c.txt")
# Get number of time points
nTimepoints <- dim(echo1$strain)[1]
nProbes <- dim(echo1$strain)[2]
Generating by using multivariate normal distribution with means and covariance matrix from given time series.
library(MASS)
set.seed(10)
# From first echo example
r1 <- mvrnorm(nProbes, mu=apply(echo1$strain, 1, mean), Sigma=cov(t(echo1$strain)))
plot(x=echo1$time.progression, y=r1[1,], ylim=c(min(r1),max(r1)),type="n", xlab="Time (ms)", ylab="Strain", main="Random Echo Generated from Echo 1")
apply(r1, 1, lines, x=echo1$time.progression)
## NULL
# From second echo example
r2 <- mvrnorm(nProbes, mu=apply(echo2$strain, 1, mean), Sigma=cov(t(echo2$strain)))
plot(x=echo2$time.progression, y=r2[1,], ylim=c(min(r2),max(r2)),type="n", xlab="Time (ms)", ylab="Strain",main="Random Echo Generated from Echo 2")
apply(r2, 1, lines, x=echo2$time.progression)
## NULL
The first echo has time data for 92 time points; the second has data for 93 time points. We can drop the last time point to facilitate analysis, if required later.
#plot(echo1$time.progression/echo2$time.progression[1:length(echo1$time.progression)])
t1 <- echo1$time.progression
t2 <- echo2$time.progression[1:length(echo1$time.progression)] #drop last time
The trace of the covariance matrix is equal to the total variance.
library(psych)
st1 <- echo1$strain
st2 <- echo2$strain
tr(cov(st1))
## [1] 3748.301
tr(cov(st2))
## [1] 1564.79
Uses the rTensor package.
We create multidimensional arrays and then fill them in with simulated data with the following structure:
i = sample index (1 to n samples) j = index of LV point where strain is measured k = time
TODO: Make sure R x C are in correct order.
nSamples <- 25
# Set dimensions for an i by j by k tensor
# i = strain probe index
# j = time point index
# k = sample index
tensorDim1 <- c(dim(echo1$strain)[2], dim(echo1$strain)[1], nSamples)
samp1 <- array(data=NA, dim=tensorDim1)
for(i in 1:nSamples){
samp1[, ,i] <- mvrnorm(nProbes, mu=apply(echo1$strain, 1, mean), Sigma=cov(t(echo1$strain)))
}
#Plot simulated data
# Probe 1, 25 individuals
plot(samp1[1,,1],type="l", ylim=c(min(samp1[1,,]),max(samp1[1,,]) ))
for(i in 2:nSamples){ lines(samp1[1,,i])}
# Probe 2, 25 individuals
plot(samp1[2,,1],type="l", ylim=c(min(samp1[2,,]),max(samp1[2,,]) ))
for(i in 2:nSamples){ lines(samp1[2,,i])}
# Creating tensor 2
tensorDim2 <- c(dim(echo2$strain)[2], dim(echo2$strain)[1], nSamples)
samp2 <- array(data=NA, dim=tensorDim2)
for(i in 1:nSamples){
samp2[, ,i] <- mvrnorm(nProbes, mu=apply(echo2$strain, 1, mean), Sigma=cov(t(echo2$strain)))
}
library(rTensor)
tnsr1 <- as.tensor(samp1)
tnsr2 <- as.tensor(samp2)
ep1 <- prcomp(echo1$strain, scale=TRUE) # 70% by pc1
ep2 <- prcomp(echo2$strain, scale=TRUE) # 70% by pc1
ep1 <- prcomp(echo1$strain, scale=FALSE) # 70% by pc1
ep2 <- prcomp(echo2$strain, scale=FALSE) # 70% by pc1
#summary(ep1)
#summary(ep2)
#plot(ep1$rotation[,1], ep1$rotation[,2])
#plot(ep2$rotation[,1], ep2$rotation[,2])
plot(ep1$sdev, type="b",xlab="Principal component",ylab="Variance",main="Echo 1")
plot(ep2$sdev, type="b",xlab="Principal component",ylab="Variance",main="Echo 2")
plot(ep1$x[,1],type="l",xlab="Time",ylab="Rotated Data",main="Echo 1")
lines(ep1$x[,2],col="red")
lines(ep1$x[,3],col="blue")
lines(ep1$x[,4],col="green")
plot(ep2$x[,1],type="l",xlab="Time",ylab="Rotated Data",main="Echo 2")
lines(ep2$x[,2],col="red")
lines(ep2$x[,3],col="blue")
lines(ep2$x[,4],col="green")
sr1 <- echo1$strain
sr2 <- echo2$strain
ep1 <- prcomp(echo1$strain.rate, scale=TRUE) # 70% by pc1
ep2 <- prcomp(echo2$strain.rate, scale=TRUE) # 70% by pc1
ep1 <- prcomp(echo1$strain.rate, scale=FALSE) # 70% by pc1
ep2 <- prcomp(echo2$strain.rate, scale=FALSE) # 70% by pc1
#summary(ep1)
#summary(ep2)
#plot(ep1$rotation[,1], ep1$rotation[,2])
#plot(ep2$rotation[,1], ep2$rotation[,2])
plot(ep1$sdev, type="b",xlab="Principal component",ylab="Variance",main="Echo 1")
plot(ep2$sdev, type="b",xlab="Principal component",ylab="Variance",main="Echo 2")
plot(ep1$x[,1],type="l",xlab="Time",ylab="Rotated Data",main="Echo 1")
lines(ep1$x[,2],col="red")
lines(ep1$x[,3],col="blue")
lines(ep1$x[,4],col="green")
plot(ep2$x[,1],type="l",xlab="Time",ylab="Rotated Data",main="Echo 2")
lines(ep2$x[,2],col="red")
lines(ep2$x[,3],col="blue")
lines(ep2$x[,4],col="green")
# Total variances, actual data
tr(cov(st1))
## [1] 3748.301
tr(cov(st2))
## [1] 1564.79
# Total variances, simulated data
totvar1 <- numeric()
totvar2 <- numeric()
for(i in 1:nSamples){
totvar1[i] <- tr(cov(t(samp1[,,i])))
totvar2[i] <- tr(cov(t(samp2[,,i])))
}
boxplot(totvar1, totvar2,names=c("Sample ","Sample 2"),ylab="Total Variance")
rmsd <- function(positions){
# Input must be data frame with rows=times
# and columns=probe values (strains, strain rates, etc.)
outputdf <- numeric(length=dim(positions)[1])
for(i in 1:nrow(positions)){
outputdf[i] <- sqrt(sum( (positions[i,] - positions[1,])^2 ) / length(positions[i,]))
}
return(rmsd=outputdf)
}
rmsd1 <- rmsd(echo1$strain)
rmsd2 <- rmsd(echo2$strain)
plot(rmsd1,type="l",xlab="Time",ylab="RMS Deviation", main="Echo 1 RMSD")
plot(rmsd2,type="l",xlab="Time",ylab="RMS Deviation", main="Echo 2 RMSD")
# RMSF is taken as average per particle, not per time (which is RMSD)
rmsf <- function(positions){
# Input must be data frame with rows=times
# and columns=probe values (strains, strain rates, etc.)
outputdf <- numeric(length=dim(positions)[2])
for(i in 1:ncol(positions)){
outputdf[i] <- sqrt(sum( (positions[,i] - positions[,1])^2 ) / length(positions[,i]))
}
return(rmsf=outputdf)
}
rmsf1 <- rmsf(echo1$strain)
rmsf2 <- rmsf(echo2$strain)
plot(rmsf1,type="l",xlab="Position Index",ylab="RMS Fluctuation",main="Echo 1 RMSF")
plot(rmsf2,type="l",xlab="Position Index",ylab="RMS Fluctuation",main="Echo 2 RMSF")
Model the sample RMSDs as ARIMA processes, and simulate from those ARIMA models.
library(forecast)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Loading required package: timeDate
## This is forecast 6.2
m2 <- auto.arima(rmsd2)
simulate_RMSDs <- function(RMSDvec, n=100){
m1 <- auto.arima(RMSDvec)
nSimRMSD <- n; RMSDmat <- matrix(data=NA, nrow=length(RMSDvec), ncol=nSimRMSD)
for(i in 1:nSimRMSD){
RMSDmat[,i] <- simulate.Arima(m1) }
return(RMSDmat)
}
This is an example of the randomly generated sequences.
randRMSDs1 <- simulate_RMSDs(rmsd1,n=10)
randRMSDs2 <- simulate_RMSDs(rmsd2, n=10)
plot(randRMSDs1[,1],type="l",ylim=c(min(randRMSDs1), max(randRMSDs1) ),xlab="Time Index",ylab="RMSD",main="Randomly Generated RMSDs")
for(i in 1:dim(randRMSDs1)[2]){ lines(randRMSDs1[,i],type="l", col=i) }
# Total variance of RMSDs
tr(cov(randRMSDs1))
## [1] 419.3347
tr(cov(randRMSDs2))
## [1] 614.6101
# Generalized Variance of RMSDs
det(cov(randRMSDs1))
## [1] 1.014382e+12
det(cov(randRMSDs2))
## [1] 460300028
eigs1 <- eigen(cov(randRMSDs1))
eigs2 <- eigen(cov(randRMSDs2))
plot(eigs1$values, type="b",ylab="Eigenvalue",xlab="Eigenvalue Index",main="Echo 1 Simulations")
plot(eigs2$values, type="b",ylab="Eigenvalue",xlab="Eigenvalue Index",main="Echo 2 Simulations")
plot(eigs1$vectors, xlab="PC1", ylab="PC2",main="Eigenvectors of RMSD Trajectories, Sim. 1")
plot(eigs2$vectors, xlab="PC1", ylab="PC2",main="Eigenvectors of RMSD Trajectories, Sim. 2")
cor(eigs1$vectors)
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1.000000000 0.07102367 -0.016744181 0.06039411 -0.12254644
## [2,] 0.071023669 1.00000000 0.040121311 -0.14471241 0.29363778
## [3,] -0.016744181 0.04012131 1.000000000 0.03411666 -0.06922656
## [4,] 0.060394106 -0.14471241 0.034116665 1.00000000 0.24969128
## [5,] -0.122546444 0.29363778 -0.069226556 0.24969128 1.00000000
## [6,] -0.034320759 0.08223716 -0.019387817 0.06992936 -0.14189455
## [7,] -0.023337405 0.05591956 -0.013183314 0.04755052 -0.09648535
## [8,] 0.006211853 -0.01488444 0.003509079 -0.01265680 0.02568207
## [9,] 0.093719534 -0.22456462 0.052942218 -0.19095577 0.38747077
## [10,] -0.052274792 0.12525744 -0.029530060 0.10651113 -0.21612307
## [,6] [,7] [,8] [,9] [,10]
## [1,] -0.034320759 -0.023337405 0.006211853 0.09371953 -0.05227479
## [2,] 0.082237160 0.055919565 -0.014884435 -0.22456462 0.12525744
## [3,] -0.019387817 -0.013183314 0.003509079 0.05294222 -0.02953006
## [4,] 0.069929360 0.047550516 -0.012656797 -0.19095577 0.10651113
## [5,] -0.141894549 -0.096485353 0.025682067 0.38747077 -0.21612307
## [6,] 1.000000000 -0.027022005 0.007192604 0.10851634 -0.06052814
## [7,] -0.027022005 1.000000000 0.004890822 0.07378886 -0.04115788
## [8,] 0.007192604 0.004890822 1.000000000 -0.01964081 0.01095523
## [9,] 0.108516335 0.073788859 -0.019640809 1.00000000 0.16528390
## [10,] -0.060528138 -0.041157879 0.010955232 0.16528390 1.00000000